import urllib
import string
import math

'''canlii search result page'''

url="http://www.canlii.com/eliisa/search.do?jurisdiction=nu&legislation=legislation&caselaw=courts&boardTribunal=tribunals&language=en&searchTitle=Nunavut&searchPage=eliisa%2FjurisdictionSearch.vm&sortOrder=relevance&requestedPage=1"

if __name__ == "__main__":
    '''get number of pages in search results'''
    webpage = urllib.urlopen(url)
    resultLine = ""
    webaddresses = []
    number_of_results = 0
    number_of_pages = 0
    for line in webpage:
        line = line.strip()
        if '<span class="size"><span id="topResultCount">' in line:
            resultLine = line
            newLine = resultLine.replace('<span class="size"><span id="topResultCount">', "")
            number_of_results = int(newLine.replace('</span> results</span>',""))
            number_of_pages = int(math.ceil(number_of_results / 25.0))
    webpage.close()
    '''this is the url template'''
    url="http://www.canlii.com/eliisa/search.do?jurisdiction=nu&legislation=legislation&caselaw=courts&boardTribunal=tribunals&language=en&searchTitle=Nunavut&searchPage=eliisa%2FjurisdictionSearch.vm&sortOrder=relevance&requestedPage="
    count=1
    '''go through pages and append webaddresses to webaddresses list'''
    for i in range(1,number_of_pages+1):
        new_url=url+str(i)
        webpage = urllib.urlopen(new_url)
        for line in webpage:
            line = line.strip()
            if 'href="http://www.canlii.com' in line:
                newline=line.replace('<a href=',"")
                index = newline.find(">")
                newline = newline[0:index]
                webaddresses.append(newline)            
        webpage.close()
    
        
    
    
        